clear
set more off


// MARRIAGE DISTRIBUTION FOR STRUCTURAL ESTIMATION   


use "$data\M_final_dataset.dta" , clear
rename y_survey year

* Education 

codebook h_educ
gen hcollege = h_educ<=2
lab var hcollege "h laurea"
gen hhigh = h_educ<=3
lab var hhigh "h high school"
codebook w_educ
gen wcollege = w_educ<=2
lab var wcollege "w laurea"
gen whigh = w_educ<=3
lab var whigh "w high school"

tab hcollege
tab hhigh
tab wcollege
tab whigh

* Age

count if h_agem<18
count if w_agem<18
drop if h_agem<18 
drop if w_agem<18
tab h_agecat
gen hagec= .
replace hagec=1 if h_agem<=24
replace hagec=2 if h_agem >=25 & h_agem<30
replace hagec=3 if h_agem >=30 & h_agem<35
replace hagec=4 if h_agem >=35 & h_agem<40
replace hagec=5 if h_agem >=40 & h_agem<45
replace hagec=6 if h_agem >=45
lab define agec 1 "24-" 2 "25-29" 3 "30-34" 4 "35-39" 5 "40-44" 6 "45+"
lab values hagec agec
tab w_agecat
gen wagec= .
replace wagec=1 if w_agem<=24
replace wagec=2 if w_agem >=25 & w_agem<30
replace wagec=3 if w_agem >=30 & w_agem<35
replace wagec=4 if w_agem >=35 & w_agem<40
replace wagec=5 if w_agem >=40 & w_agem<45
replace wagec=6 if w_agem >=45
lab values wagec agec

tab hagec
tab wagec

* Origin 
gen hnative= hbc==0
tab hnative
gen wnative= wbc==0
tab wnative 

* Treatment
gen nat= wbc
merge m:1 nat using "$data\CodiciStatiEsteri.dta" , keepusing(EUII)
tab _merge
drop _merge
replace EUII=4 if nat==224 
replace EUII=4 if nat==258  
replace EUII=4 if nat==259  
replace EUII=4 if nat==260  
replace EUII=4 if nat==261  
replace EUII=6 if nat==317  
replace EUII=0 if nat==0  
lab define eui3 0 "ITA" 1 "EU15"  2 "EU2004" 3 "EU2007" 4 "EU_Other" 5 "Africa" 6 "Asia" 7 "America_South" 8 "OECD" 
label values EUII eui3
rename EUII WEUII 
gen worigin=.
replace worigin=0 if WEUII==0 
replace worigin=1 if WEUII==1 
replace worigin=2 if WEUII==2 | WEUII==3 
replace worigin=3 if WEUII==4
replace worigin=4 if WEUII==5
replace worigin=5 if WEUII==6
replace worigin=6 if WEUII==7
replace worigin=7 if WEUII==8
lab define lorigin 0 "italian" 1 "eu" 2 "eu10+eu2" 3 "euother" 4 "africa" 5 "asia" 6 "americas" 7 "oecd"
lab values worigin lorigin
drop WEUII nat

gen nat= hbc
merge m:1 nat using "$data\CodiciStatiEsteri.dta" , keepusing(EUII)
tab _merge
drop _merge
replace EUII=4 if nat==224 
replace EUII=4 if nat==258  
replace EUII=4 if nat==259  
replace EUII=4 if nat==260  
replace EUII=4 if nat==261  
replace EUII=6 if nat==317  
replace EUII=0 if nat==0  
lab define eui3 0 "ITA" 1 "EU15"  2 "EU2004" 3 "EU2007" 4 "EU_Other" 5 "Africa" 6 "Asia" 7 "America_South" 8 "OECD" 
label values EUII eui3
rename EUII HEUII 
gen horigin=.
replace horigin=0 if HEUII==0 
replace horigin=1 if HEUII==1 
replace horigin=2 if HEUII==2 | HEUII==3 
replace horigin=3 if HEUII==4 
replace horigin=4 if HEUII==5 
replace horigin=5 if HEUII==6 
replace horigin=6 if HEUII==7 
replace horigin=7 if HEUII==8 
lab values horigin lorigin
drop HEUII nat 

codebook worigin
codebook horigin
drop if worigin==.
drop if horigin==.


* Select sample of provinces and years before2002 and post2007

keep if provres==15 | provres==58 | provres==1 | provres==63 | provres==82 | provres==37 | provres==48 | provres==27 | provres==32 | ///
        provres==10 | provres==79 | provres==76 | provres==66 | provres==42 | provres==70 | provres==72 | provres==92 | provres==22 | ///
		provres==54 | provres==7 | provres==17 | provres==65  | provres==87 | provres==28 | provres==16 | provres==23

drop if year>=2003&year<=2006
gen before = (year <=2002)

gen nm=1
collapse (sum) nm , by( hhigh hagec horigin whigh wagec worigin provres before) 

* Keep marriages to recover ind (males and females) available in the marriage market

preserve
collapse (sum) nm , by( whigh wagec worigin before) 
rename nm nmwomen
save "$data\marriedwomen.dta" , replace
restore

preserve
collapse (sum) nm , by( hhigh hagec horigin before) 
rename nm nmmen
save "$data\marriedmen.dta" , replace
restore

tab worigin, gen(wd)
rename wd1 w_italian
rename wd2 w_eu
rename wd3 w_eu12
rename wd4 w_euother
rename wd5 w_africa
rename wd6 w_asia
rename wd7 w_america
rename wd8 w_oecd

global WORIGIN w_italian w_eu w_eu12 w_euother w_africa w_asia w_america w_oecd
foreach var in $WORIGIN {
replace `var' = nm if `var'==1
}
collapse (sum) $WORIGIN , by( hhigh hagec horigin whigh wagec provres before) 

save "$data\M_dataset_maritaldistribution.dta" , replace

preserve
keep if before==1
export "$data\All_Marriages_Before.csv" , replace
restore

preserve
keep if before==0
export "$data\All_Marriages_After.csv" , replace
restore




// PREPARE INDIVIDUAL VECTORS (MALES AND FEMALES) FOR STRUCTURAL ESTIMATION   

********************************************************************************
*  Figure A3: Panel b.
*  Distribution of population vectors, by age, education, and area of origin

use "$data\marriedwomen.dta" , clear
rename nmwomen female
rename whigh high
rename wagec agec
rename worigin origin

preserve
use "$data\census2011_singledistribution.dta", clear
gen before =0
append using "$data\census2001_singledistribution.dta"
replace before=1 if before==.
drop y
save "$data\census_distribution.dta", replace
restore

append using "$data\census_distribution.dta"
collapse (sum) female , by( high agec before ) 
replace female=female/1000
reshape wide  female , i(high agec) j(before)
rename female0 female_after
rename female1 female_before
lab define agel 1 "22" 2 "27" 3 "32" 4 "37" 5 "42" 6 "50"
lab values agec agel
lab define edul 0 "Low" 1 "High"
lab values high edul

gr bar female_before female_after , over(agec) over (high) ///
legend(off) bar(1, color(dknavy)) bar(2, color(cranberry)) ///
title("Women by Age and Education")
gr save  "$output\Women_age_educ" , replace
gr export "$output\Women_age_educ.pdf" , replace


********************************************************************************
*  Figure A3: Panel d.
*  Distribution of population vectors, by age, education, and area of origin

use "$data\marriedwomen.dta" , clear
rename nmwomen female
rename whigh high
rename wagec agec
rename worigin origin

preserve
use "$data\census2011_singledistribution.dta", clear
gen before =0
append using "$data\census2001_singledistribution.dta"
replace before=1 if before==.
drop y
save "$data\census_distribution.dta", replace
restore

append using "$data\census_distribution.dta"
collapse (sum) female , by( origin before ) 
replace female=female/1000
reshape wide  female , i(origin) j(before)
rename female0 female_after
rename female1 female_before
drop if origin==0
gr bar female_before female_after , over(origin)  ///
legend(off) bar(1, color(dknavy)) bar(2, color(cranberry)) title("Women by Origin")
gr save  "$output\Women_origin" , replace
gr export "$output\Women_origin.pdf" , replace



********************************************************************************
*  Figure A3: Panel a.
*  Distribution of population vectors, by age, education, and area of origin

use "$data\marriedmen.dta" , clear
rename nmmen male
rename hhigh high
rename hagec agec
rename horigin origin

preserve
use "$data\census2011_singledistribution.dta", clear
gen before =0
append using "$data\census2001_singledistribution.dta"
replace before=1 if before==.
drop y
save "$data\census_distribution.dta", replace
restore

append using "$data\census_distribution.dta"
collapse (sum) male , by( high agec before ) 
replace male=male/1000
reshape wide  male , i(high agec) j(before)
rename male0 male_after
rename male1 male_before
lab define agel 1 "22" 2 "27" 3 "32" 4 "37" 5 "42" 6 "50"
lab values agec agel
lab define edul 0 "Low" 1 "High"
lab values high edul
gr bar male_before male_after , over(agec) over (high) ///
legend(off) bar(1, color(dknavy)) bar(2, color(cranberry)) ///
title("Men by Age and Education")
gr save  "$output\Men_age_educ" , replace
gr export "$output\Men_age_educ.pdf" , replace


********************************************************************************
*  Figure A3: Panel c.
*  Distribution of population vectors, by age, education, and area of origin

use "$data\marriedmen.dta" , clear
rename nmmen male
rename hhigh high
rename hagec agec
rename horigin origin

preserve
use "$data\census2011_singledistribution.dta", clear
gen before =0
append using "$data\census2001_singledistribution.dta"
replace before=1 if before==.
drop y
save "$data\census_distribution.dta", replace

restore
append using "$data\census_distribution.dta"
collapse (sum) male , by( origin before ) 
replace male=male/1000
reshape wide  male , i(origin) j(before)
rename male0 male_after
rename male1 male_before
drop if origin==0
gr bar male_before male_after , over(origin)  ///
legend(off) bar(1, color(dknavy)) bar(2, color(cranberry)) title("Men by Origin")
gr save  "$output\Men_origin" , replace
gr export "$output\Men_origin.pdf" , replace





